package WebPAC::Output::Estraier;

use warnings;
use strict;

use base qw/WebPAC::Common/;

use Search::Estraier 0.06;
use Encode qw/from_to/;
use Data::Dumper;
use LWP;
use URI::Escape;
use List::Util qw/first/;

=head1 NAME

WebPAC::Output::Estraier - Create Hyper Estraier full text index

=head1 VERSION

Version 0.12

=cut

our $VERSION = '0.12';

=head1 SYNOPSIS

Create full text index using Hyper Estraier index from data with
type C<search>.

=head1 FUNCTIONS

=head2 new

Connect to Hyper Estraier index using HTTP

 my $est = new WebPAC::Output::Estraier(
 	masterurl => 'http://localhost:1978/',
	user => 'admin',
	passwd => 'admin',
	database => 'demo',
	label => 'node label',
	encoding => 'iso-8859-2',
	clean => 1,
 );

Options are:

=over 4

=item masterurl

URI to C<estmaster> node

=item user

C<estmaster> user with sufficient rights

=item passwd

password for user

=item database

name of database from which data comes

=item label

label for node (optional)

=item encoding

character encoding of C<data_structure> if it's differenet than C<ISO-8859-2>
(and it probably is). This encoding will be converted to C<UTF-8> for
Hyper Estraier.

=back

Name of database will be used to form URI of documents in index.

=cut

sub new {
	my $class = shift;
	my $self = {@_};
	bless($self, $class);

	my $log = $self->_get_logger;

	#$log->debug("self: ", sub { Dumper($self) });

	foreach my $p (qw/masterurl user passwd database/) {
		$log->logdie("need $p") unless ($self->{$p});
	}

	$self->{encoding} ||= 'ISO-8859-2';

	my $url = $self->{masterurl} . '/node/' . $self->{database};
	$self->{url} = $url;

	$self->{label} ||= "WebPAC $self->{database}";

	$self->{db} = Search::Estraier::Node->new(
		url => $url,
		user => $self->{user},
		passwd => $self->{passwd},
		debug => $self->{debug},
		create => 1,
		label => $self->convert( $self->{label} ),
	);

	$log->info("using ", $self->{clean} ? "new " : "", "index $self->{url} '$self->{label}' with encoding $self->{encoding}");

	if ($self->{clean}) {
		$log->debug("clean $self->{database}");
		$self->master( action => 'nodeclr', name => $self->{database} );
	} else {
		$log->debug("opening index $self->{url}");
	}

	$self ? return $self : return undef;
}


=head2 add

Adds one entry to database.

  $est->add(
  	id => 42,
	ds => $ds,
	type => 'display',
	text => 'optional text from which snippet is created',
  );

This function will create  entries in index using following URI format:

  C<file:///type/database%20name/000>

Each tag in C<data_structure> with specified C<type> will create one
attribute and corresponding hidden text (used for search).

=cut

sub add {
	my $self = shift;

	my $args = {@_};

	my $log = $self->_get_logger;

	my $database = $self->{'database'} || $log->logconfess('no database in $self');
	$log->logconfess('need db in object') unless ($self->{'db'});

	foreach my $p (qw/id ds type/) {
		$log->logdie("need $p") unless ($args->{$p});
	}

	my $type = $args->{'type'};
	my $id = $args->{'id'};

	my $uri = "file:///$type/$database/$id";
	$log->debug("creating $uri");

	my $doc = Search::Estraier::Document->new;
	$doc->add_attr('@uri', $self->convert($uri) );

	$log->debug("ds = ", sub { Dumper($args->{'ds'}) } );

	# filter all tags which have type defined
	my @tags = grep {
		ref($args->{'ds'}->{$_}) eq 'HASH' && defined( $args->{'ds'}->{$_}->{$type} )
	} keys %{ $args->{'ds'} };

	$log->debug("tags = ", join(",", @tags));

	return unless (@tags);

	foreach my $tag (@tags) {

		my $vals = join(" ", @{ $args->{'ds'}->{$tag}->{$type} });

		next if (! $vals);

		$vals = $self->convert( $vals ) or
			$log->logdie("can't convert '$vals' to UTF-8");

		$doc->add_attr( $tag, $vals );
		$doc->add_hidden_text( $vals );
	}

	my $text = $args->{'text'};
	if ( $text ) {
		$text = $self->convert( $text ) or
			$log->logdie("can't convert '$text' to UTF-8");
		$doc->add_text( $text );
	}

	$log->debug("adding ", sub { $doc->dump_draft } );
	$self->{'db'}->put_doc($doc) || $log->warn("can't add document $uri with draft " . $doc->dump_draft . " to node " . $self->{url} . " status: " . $self->{db}->status());

	return 1;
}

=head2 add_link

  $est->add_link(
  	from => 'ps',
	to => 'webpac2',
	credit => 10000,
  );

=cut

sub add_link {
	my $self = shift;

	my $args = {@_};
	my $log = $self->_get_logger;

	foreach my $p (qw/from to credit/) {
		$log->logdie("need $p") unless ($args->{$p});
	}

	my $node = first { $_->{name} eq $args->{to} } $self->master( action => 'nodelist' );

	if (! $node) {
		$log->warn("can't find node $args->{to}, skipping link creaton");
		return;
	}

	my $label = $node->{label};

	if (! $label) {
		$log->warn("can't find label for $args->{to}, skipping link creaton");
		return;
	}

	$log->debug("using label $label for $args->{to}");

	return $self->{db}->set_link(
		$self->{masterurl} . '/node/' . $args->{to},
		$label,
		$args->{credit},
	);
}


=head2 master

Issue administrative commands to C<estmaster> process. See documentation for
C<master> in L<Search::Estraier>::Node.

  $self->master(
  	action => 'nodeclr',
	name => 'foobar',
  );

=cut

sub master {
	my $self = shift;
	$self->{db}->master( @_ );
}


=head2 convert

 my $utf8_string = $self->convert('string in codepage');

=cut

sub convert {
	my $self = shift;

	my $text = shift || return;
	from_to($text, $self->{encoding}, 'UTF-8');
	return $text;
}

=head1 AUTHOR

Dobrica Pavlinusic, C<< <dpavlin@rot13.org> >>

=head1 COPYRIGHT & LICENSE

Copyright 2005 Dobrica Pavlinusic, All Rights Reserved.

This program is free software; you can redistribute it and/or modify it
under the same terms as Perl itself.

=cut

1; # End of WebPAC::Output::Estraier
